library(knitr)
library(dplyr)
library(tidyr)
library(ggplot2)
library(plotly)
library(DT)
library(ggthemes)
library(PMCMR)
library(scmamp)
knitr::opts_chunk$set(dpi=96, results = "asis")
knitr::knit_hooks$set(inline = function(x) {
  prettyNum(x, big.mark=",")
})
SAVE_EPS = F
prettyTable <- function(table_df, round_columns=numeric(), round_digits=3, make_plot=T) {
    DT::datatable(table_df, style="bootstrap", filter = "top", rownames = FALSE, extensions = "Buttons",
                  options = list(dom = 'Bfrtip', buttons = c('copy', 'csv', 'excel', 'pdf', 'print'))) %>%
    formatRound(round_columns, round_digits)
}
reportFriedman <- function(df, classifier, metric, metric_function, metric_direction=1, make_plot = T){
    averages = data.frame(df) %>%
        select(-c(Attributes, Number.of.classes, Min.class.examples, Max.class.examples)) %>%
        group_by(Dataset, Classifier, Feature.selector) %>%
        summarise_all(metric_function, na.rm=T) %>%
        data.frame() %>%
        filter(Feature.selector != 'All', Classifier == classifier) %>%
        select_("Dataset", "Feature.selector", metric) %>%
        spread("Feature.selector", metric) %>%
        select(-Dataset) %>%
        select(`FRFE`, `RFE-log`, `3-SRFE`, `RFE-log-3`, `5-SRFE`, `RFE-log-5`, `10-SRFE`, `RFE-log-10`) %>%
        data.matrix()
    
    if (metric_direction == 1) {
      averages_r = -averages
      averages_t = averages
    } else {
      averages_r = averages
      averages_t = -averages
    }
    ranks <- t(apply(averages_r, 1, rank))
    ranks <- ranks[,order(colMeans(ranks, na.rm=TRUE))]
    cat("<hr><strong>Friedman rank sum test</strong><br />")
    fTest <- friedman.test(averages_t)
    testResult <- capture.output(print(fTest))
    cat(testResult[5])
    cat("\r\n")
    mean_ranks <- t(colMeans(ranks, na.rm=TRUE))
    print(kable(mean_ranks, digits = 2))
    cat("\r\n")
    
    w_df <- data.frame(Subsecting = c("FRFE", "3-SRFE", "5-SRFE", "10-SRFE"),
                       Standard = c("RFE-log", "RFE-log-3", "RFE-log-5", "RFE-log-10"), 
                       p.value = c(-1, -1,-1, -1)) %>%
      mutate(Hypothesis = paste0(Subsecting, " vs ", Standard))
    cat("<hr><strong>Wilcoxon signed rank test</strong><br />")
    for (r in 1:nrow(w_df)){
      wTest <- wilcox.test(averages_t[,as.character(w_df[r, "Subsecting"])],
                           averages_t[,as.character(w_df[r, "Standard"])],
                           paired=TRUE, alternative = "g")
      w_df[r, "p.value"] <- wTest$p.value
    }
    print(kable(w_df %>% select(Hypothesis, p.value), digits = 3))
    cat("\r\n")
    
    
    
    if (make_plot){
        setEPS()
        par(mar=c(0,0,0,0))
        postscript(paste0("images/", metric, "_", classifier, "_",  "Friedman.eps"), width = 7, height = 3.4)
        plotCD(results.matrix = averages_t, alpha = 0.05, cex = 1.1)
        dev.off()
    }
    cat("<hr>")
    mean_ranks_df <- data.frame(`FRFE` = mean_ranks[1, "FRFE"], `RFE-log` = mean_ranks[1, "RFE-log"], `3-SRFE` = mean_ranks[1, "3-SRFE"], `RFE-log-3` = mean_ranks[1, "RFE-log-3"], `5-SRFE` = mean_ranks[1, "5-SRFE"], `RFE-log-5` = mean_ranks[1, "RFE-log-5"], `10-SRFE` = mean_ranks[1, "10-SRFE"], `RFE-log-10` = mean_ranks[1, "RFE-log-10"], `p-value` = fTest$p.value)
    mean_ranks_df
}

1 Benchmark results

1.1 Raw results

df <- read.csv("Benchmarks.csv", na.strings = c("?", "", "-")) %>%
    mutate(Dataset = as.character(Dataset), Classifier = as.character(Classifier), Selected.num = as.character(Selected.num)) %>%
    select(-c(Start.date, Selector.params, Scorer, Grid.scores, Selected.features))
df$Dataset <- substr(df$Dataset, 1, nchar(df$Dataset)-4)
df$Dataset <- as.factor(df$Dataset)
df$Classifier[startsWith(df$Classifier, "SVC")] <- "SVM"
df$Classifier[startsWith(df$Classifier, "Random")] <- "RF"
df$Classifier[startsWith(df$Classifier, "Logistic")] <- "LR"
df$Classifier[startsWith(df$Classifier, "LGBM")] <- "GBM"
df$Classifier <- as.factor(df$Classifier)
df$Selected.num[df$Selected.num == "error"] <- NA
df$Selected.num <- as.numeric(df$Selected.num)
prettyTable(df, c(9, 11:15))

1.2 Datasets

datasets_df <- df %>%
  select(Dataset, Examples, Attributes, Number.of.classes, Min.class.examples, Max.class.examples) %>%
  distinct()
  
prettyTable(datasets_df)

1.3 Mean cross-validation scores

cv_df <- df %>%
  select(-c(Attributes, Number.of.classes, Min.class.examples, Max.class.examples)) %>%
  group_by(Dataset, Classifier, Feature.selector) %>%
  summarise_all(mean, na_rm=T)
  
prettyTable(cv_df, c(5, 8:12))

2 Comparisons and statistical tests

classifiers <- as.character(unique(cv_df$Classifier))
metrics <- c("Selected.num", "Selected.num", "Accuracy", "Kappa", "Macro.recall", "G.mean", "Processing.time")
metric_functions <- c(mean, sd, mean, mean, mean, mean, mean)
mertric_headers <-c("Number of selected features", "Standard deviation of number of selected features", "Accuracy", "Kappa", "Macro recall", "G-mean", "Processing time")
metric_direction <- c(-1, -1, 1, 1, 1, 1, -1)
friedman_df <- data.frame(Metric = character(), Classifier = character(), `FRFE` = numeric(), `RFE-log` = numeric(), `3-SRFE` = numeric(), `RFE-log-3` = numeric(), `5-SRFE` = numeric(), `RFE-log-5` = numeric(), `10-SRFE` = numeric(), `RFE-log-10` = numeric(), `p-value` = numeric())
for (i in seq_along(metrics)){
  cat(paste0("## ", mertric_headers[i],"\r\n\r\n"))
  for (classifier in classifiers){
    cat(paste0("### ", classifier,"\r\n\r\n"))
    mean_ranks <- reportFriedman(df, classifier, metrics[i], metric_functions[i], metric_direction[i], make_plot = F)
    mean_df <- data.frame(Metric = mertric_headers[i], Classifier = classifier, `FRFE` = mean_ranks$FRFE, `RFE-log` = mean_ranks$RFE.log, `3-SRFE` = mean_ranks$X3.SRFE, `RFE-log-3` = mean_ranks$RFE.log.3, `5-SRFE` = mean_ranks$X5.SRFE, `RFE-log-5` = mean_ranks$RFE.log.5, `10-SRFE` = mean_ranks$X10.SRFE, `RFE-log-10` = mean_ranks$RFE.log.10, `p-value` = mean_ranks$p.value)
    friedman_df <- rbind(friedman_df, mean_df)
  }
}

2.1 Number of selected features

2.1.1 GBM


Friedman rank sum test
Friedman chi-squared = 43.604, df = 7, p-value = 2.549e-07
3-SRFE 5-SRFE FRFE 10-SRFE RFE-log RFE-log-3 RFE-log-5 RFE-log-10
3.02 3.7 3.75 3.82 5.25 5.29 5.54 5.64

Wilcoxon signed rank test
Hypothesis p.value
FRFE vs RFE-log 0.003
3-SRFE vs RFE-log-3 0.000
5-SRFE vs RFE-log-5 0.002
10-SRFE vs RFE-log-10 0.003

2.1.2 LR


Friedman rank sum test
Friedman chi-squared = 49.037, df = 7, p-value = 2.231e-08
10-SRFE 3-SRFE 5-SRFE RFE-log-10 FRFE RFE-log-3 RFE-log-5 RFE-log
2.39 3.61 3.96 4.3 5.14 5.32 5.36 5.91

Wilcoxon signed rank test
Hypothesis p.value
FRFE vs RFE-log 0.048
3-SRFE vs RFE-log-3 0.011
5-SRFE vs RFE-log-5 0.023
10-SRFE vs RFE-log-10 0.002

2.1.3 RF


Friedman rank sum test
Friedman chi-squared = 30.123, df = 7, p-value = 9.013e-05
10-SRFE 3-SRFE 5-SRFE RFE-log-5 RFE-log RFE-log-10 FRFE RFE-log-3
3.29 3.43 3.71 4.46 4.96 5.18 5.39 5.57

Wilcoxon signed rank test
Hypothesis p.value
FRFE vs RFE-log 0.866
3-SRFE vs RFE-log-3 0.007
5-SRFE vs RFE-log-5 0.045
10-SRFE vs RFE-log-10 0.008

2.1.4 SVM


Friedman rank sum test
Friedman chi-squared = 37.86, df = 7, p-value = 3.221e-06
3-SRFE 10-SRFE 5-SRFE RFE-log-5 FRFE RFE-log-3 RFE-log-10 RFE-log
3.2 3.21 4 4.41 5.04 5.11 5.18 5.86

Wilcoxon signed rank test
Hypothesis p.value
FRFE vs RFE-log 0.023
3-SRFE vs RFE-log-3 0.010
5-SRFE vs RFE-log-5 0.035
10-SRFE vs RFE-log-10 0.000

2.2 Standard deviation of number of selected features

2.2.1 GBM


Friedman rank sum test
Friedman chi-squared = 9.0074, df = 7, p-value = 0.2521
FRFE RFE-log 3-SRFE 5-SRFE 10-SRFE RFE-log-3 RFE-log-5 RFE-log-10
3.07 3.88 3.88 4.38 4.75 4.79 5.61 5.66

Wilcoxon signed rank test
Hypothesis p.value
FRFE vs RFE-log 0.212
3-SRFE vs RFE-log-3 0.041
5-SRFE vs RFE-log-5 0.041
10-SRFE vs RFE-log-10 0.045

2.2.2 LR


Friedman rank sum test
Friedman chi-squared = 30.955, df = 7, p-value = 6.337e-05
10-SRFE 5-SRFE 3-SRFE FRFE RFE-log-5 RFE-log RFE-log-3 RFE-log-10
3.32 3.61 3.64 3.93 5.14 5.16 5.43 5.77

Wilcoxon signed rank test
Hypothesis p.value
FRFE vs RFE-log 0.022
3-SRFE vs RFE-log-3 0.005
5-SRFE vs RFE-log-5 0.009
10-SRFE vs RFE-log-10 0.000

2.2.3 RF


Friedman rank sum test
Friedman chi-squared = 24.119, df = 7, p-value = 0.001086
FRFE RFE-log-10 RFE-log RFE-log-5 RFE-log-3 5-SRFE 10-SRFE 3-SRFE
3.46 3.88 4.09 4.11 4.29 4.64 5.61 5.93

Wilcoxon signed rank test
Hypothesis p.value
FRFE vs RFE-log 0.140
3-SRFE vs RFE-log-3 0.929
5-SRFE vs RFE-log-5 0.786
10-SRFE vs RFE-log-10 0.997

2.2.4 SVM


Friedman rank sum test
Friedman chi-squared = 34.667, df = 7, p-value = 1.292e-05
FRFE 10-SRFE 3-SRFE 5-SRFE RFE-log-10 RFE-log RFE-log-5 RFE-log-3
3.18 3.43 3.75 4 5.07 5.39 5.57 5.61

Wilcoxon signed rank test
Hypothesis p.value
FRFE vs RFE-log 0.000
3-SRFE vs RFE-log-3 0.002
5-SRFE vs RFE-log-5 0.004
10-SRFE vs RFE-log-10 0.000

2.3 Accuracy

2.3.1 GBM


Friedman rank sum test
Friedman chi-squared = 7.8272, df = 7, p-value = 0.3481
RFE-log-3 RFE-log-5 RFE-log RFE-log-10 FRFE 3-SRFE 10-SRFE 5-SRFE
3.77 4.23 4.36 4.39 4.64 4.79 4.79 5.04

Wilcoxon signed rank test
cannot compute exact p-value with tiescannot compute exact p-value with zeroescannot compute exact p-value with tiescannot compute exact p-value with zeroescannot compute exact p-value with tiescannot compute exact p-value with zeroescannot compute exact p-value with tiescannot compute exact p-value with zeroes
Hypothesis p.value
FRFE vs RFE-log 0.778
3-SRFE vs RFE-log-3 0.997
5-SRFE vs RFE-log-5 0.886
10-SRFE vs RFE-log-10 0.481

2.3.2 LR


Friedman rank sum test
Friedman chi-squared = 3.2789, df = 7, p-value = 0.8581
RFE-log-10 3-SRFE FRFE 5-SRFE RFE-log-5 10-SRFE RFE-log RFE-log-3
4.11 4.27 4.3 4.32 4.55 4.79 4.82 4.84

Wilcoxon signed rank test
cannot compute exact p-value with tiescannot compute exact p-value with zeroescannot compute exact p-value with tiescannot compute exact p-value with zeroescannot compute exact p-value with zeroescannot compute exact p-value with tiescannot compute exact p-value with zeroes
Hypothesis p.value
FRFE vs RFE-log 0.434
3-SRFE vs RFE-log-3 0.623
5-SRFE vs RFE-log-5 0.530
10-SRFE vs RFE-log-10 0.841

2.3.3 RF


Friedman rank sum test
Friedman chi-squared = 4.9125, df = 7, p-value = 0.6706
3-SRFE 5-SRFE RFE-log-10 RFE-log-3 FRFE RFE-log 10-SRFE RFE-log-5
4.04 4.05 4.21 4.41 4.66 4.75 4.91 4.96

Wilcoxon signed rank test
cannot compute exact p-value with zeroescannot compute exact p-value with tiescannot compute exact p-value with zeroescannot compute exact p-value with zeroescannot compute exact p-value with tiescannot compute exact p-value with zeroes
Hypothesis p.value
FRFE vs RFE-log 0.457
3-SRFE vs RFE-log-3 0.415
5-SRFE vs RFE-log-5 0.092
10-SRFE vs RFE-log-10 0.928

2.3.4 SVM


Friedman rank sum test
Friedman chi-squared = 15.172, df = 7, p-value = 0.03385
RFE-log RFE-log-5 RFE-log-10 RFE-log-3 3-SRFE FRFE 10-SRFE 5-SRFE
3.82 4 4.02 4.34 4.45 4.64 5.14 5.59

Wilcoxon signed rank test
cannot compute exact p-value with tiescannot compute exact p-value with zeroescannot compute exact p-value with tiescannot compute exact p-value with zeroescannot compute exact p-value with tiescannot compute exact p-value with zeroescannot compute exact p-value with tiescannot compute exact p-value with zeroes
Hypothesis p.value
FRFE vs RFE-log 0.928
3-SRFE vs RFE-log-3 0.847
5-SRFE vs RFE-log-5 0.995
10-SRFE vs RFE-log-10 0.839

2.4 Kappa

2.4.1 GBM


Friedman rank sum test
Friedman chi-squared = 8.0687, df = 7, p-value = 0.3266
RFE-log-3 RFE-log-5 RFE-log-10 RFE-log FRFE 10-SRFE 3-SRFE 5-SRFE
3.77 4.27 4.32 4.34 4.61 4.8 4.89 5

Wilcoxon signed rank test
cannot compute exact p-value with tiescannot compute exact p-value with zeroescannot compute exact p-value with tiescannot compute exact p-value with zeroescannot compute exact p-value with tiescannot compute exact p-value with zeroescannot compute exact p-value with tiescannot compute exact p-value with zeroes
Hypothesis p.value
FRFE vs RFE-log 0.823
3-SRFE vs RFE-log-3 0.997
5-SRFE vs RFE-log-5 0.911
10-SRFE vs RFE-log-10 0.444

2.4.2 LR


Friedman rank sum test
Friedman chi-squared = 3.1857, df = 7, p-value = 0.8673
RFE-log-10 3-SRFE 5-SRFE FRFE RFE-log-5 RFE-log RFE-log-3 10-SRFE
4.11 4.29 4.3 4.38 4.46 4.82 4.82 4.82

Wilcoxon signed rank test
cannot compute exact p-value with tiescannot compute exact p-value with zeroescannot compute exact p-value with zeroescannot compute exact p-value with zeroescannot compute exact p-value with tiescannot compute exact p-value with zeroes
Hypothesis p.value
FRFE vs RFE-log 0.536
3-SRFE vs RFE-log-3 0.618
5-SRFE vs RFE-log-5 0.518
10-SRFE vs RFE-log-10 0.823

2.4.3 RF


Friedman rank sum test
Friedman chi-squared = 5.2849, df = 7, p-value = 0.6252
5-SRFE RFE-log-10 3-SRFE RFE-log-3 FRFE RFE-log 10-SRFE RFE-log-5
4.07 4.11 4.12 4.34 4.64 4.73 4.98 5

Wilcoxon signed rank test
cannot compute exact p-value with zeroescannot compute exact p-value with tiescannot compute exact p-value with zeroescannot compute exact p-value with zeroescannot compute exact p-value with zeroes
Hypothesis p.value
FRFE vs RFE-log 0.520
3-SRFE vs RFE-log-3 0.424
5-SRFE vs RFE-log-5 0.136
10-SRFE vs RFE-log-10 0.913

2.4.4 SVM


Friedman rank sum test
Friedman chi-squared = 13.647, df = 7, p-value = 0.05784
RFE-log RFE-log-10 RFE-log-5 RFE-log-3 3-SRFE FRFE 10-SRFE 5-SRFE
3.86 3.95 4 4.43 4.52 4.66 5.07 5.52

Wilcoxon signed rank test
cannot compute exact p-value with zeroescannot compute exact p-value with zeroescannot compute exact p-value with tiescannot compute exact p-value with zeroescannot compute exact p-value with tiescannot compute exact p-value with zeroes
Hypothesis p.value
FRFE vs RFE-log 0.938
3-SRFE vs RFE-log-3 0.835
5-SRFE vs RFE-log-5 0.994
10-SRFE vs RFE-log-10 0.855

2.5 Macro recall

2.5.1 GBM


Friedman rank sum test
Friedman chi-squared = 6.6426, df = 7, p-value = 0.467
RFE-log-3 RFE-log-5 RFE-log RFE-log-10 FRFE 10-SRFE 3-SRFE 5-SRFE
3.84 4.27 4.43 4.43 4.54 4.57 4.89 5.04

Wilcoxon signed rank test
cannot compute exact p-value with tiescannot compute exact p-value with zeroescannot compute exact p-value with tiescannot compute exact p-value with zeroescannot compute exact p-value with tiescannot compute exact p-value with zeroescannot compute exact p-value with tiescannot compute exact p-value with zeroes
Hypothesis p.value
FRFE vs RFE-log 0.649
3-SRFE vs RFE-log-3 0.998
5-SRFE vs RFE-log-5 0.924
10-SRFE vs RFE-log-10 0.594

2.5.2 LR


Friedman rank sum test
Friedman chi-squared = 4.0346, df = 7, p-value = 0.7758
3-SRFE RFE-log-10 FRFE RFE-log-5 5-SRFE RFE-log-3 10-SRFE RFE-log
4.18 4.18 4.29 4.34 4.39 4.86 4.88 4.89

Wilcoxon signed rank test
cannot compute exact p-value with tiescannot compute exact p-value with zeroescannot compute exact p-value with tiescannot compute exact p-value with zeroescannot compute exact p-value with zeroescannot compute exact p-value with tiescannot compute exact p-value with zeroes
Hypothesis p.value
FRFE vs RFE-log 0.422
3-SRFE vs RFE-log-3 0.540
5-SRFE vs RFE-log-5 0.552
10-SRFE vs RFE-log-10 0.726

2.5.3 RF


Friedman rank sum test
Friedman chi-squared = 6.6782, df = 7, p-value = 0.4631
5-SRFE 3-SRFE RFE-log-10 RFE-log-3 FRFE RFE-log 10-SRFE RFE-log-5
3.86 4.11 4.12 4.41 4.68 4.88 4.96 4.98

Wilcoxon signed rank test
cannot compute exact p-value with tiescannot compute exact p-value with zeroescannot compute exact p-value with tiescannot compute exact p-value with zeroescannot compute exact p-value with tiescannot compute exact p-value with zeroescannot compute exact p-value with tiescannot compute exact p-value with zeroes
Hypothesis p.value
FRFE vs RFE-log 0.390
3-SRFE vs RFE-log-3 0.385
5-SRFE vs RFE-log-5 0.094
10-SRFE vs RFE-log-10 0.960

2.5.4 SVM


Friedman rank sum test
Friedman chi-squared = 14.459, df = 7, p-value = 0.0436
RFE-log RFE-log-10 RFE-log-5 RFE-log-3 3-SRFE FRFE 10-SRFE 5-SRFE
3.82 3.98 4.12 4.27 4.43 4.71 5.11 5.55

Wilcoxon signed rank test
cannot compute exact p-value with tiescannot compute exact p-value with zeroescannot compute exact p-value with tiescannot compute exact p-value with zeroescannot compute exact p-value with tiescannot compute exact p-value with zeroescannot compute exact p-value with tiescannot compute exact p-value with zeroes
Hypothesis p.value
FRFE vs RFE-log 0.930
3-SRFE vs RFE-log-3 0.859
5-SRFE vs RFE-log-5 0.994
10-SRFE vs RFE-log-10 0.812

2.6 G-mean

2.6.1 GBM


Friedman rank sum test
Friedman chi-squared = 6.9734, df = 7, p-value = 0.4317
RFE-log-3 RFE-log-10 RFE-log-5 FRFE RFE-log 10-SRFE 5-SRFE 3-SRFE
3.77 4.32 4.41 4.48 4.52 4.61 4.86 5.04

Wilcoxon signed rank test
cannot compute exact p-value with zeroescannot compute exact p-value with tiescannot compute exact p-value with zeroescannot compute exact p-value with zeroescannot compute exact p-value with zeroes
Hypothesis p.value
FRFE vs RFE-log 0.596
3-SRFE vs RFE-log-3 0.998
5-SRFE vs RFE-log-5 0.807
10-SRFE vs RFE-log-10 0.831

2.6.2 LR


Friedman rank sum test
Friedman chi-squared = 4.5779, df = 7, p-value = 0.7113
RFE-log-10 FRFE RFE-log-5 5-SRFE 3-SRFE RFE-log-3 RFE-log 10-SRFE
4.12 4.27 4.27 4.29 4.34 4.84 4.89 4.98

Wilcoxon signed rank test
cannot compute exact p-value with zeroescannot compute exact p-value with zeroescannot compute exact p-value with zeroescannot compute exact p-value with tiescannot compute exact p-value with zeroes
Hypothesis p.value
FRFE vs RFE-log 0.177
3-SRFE vs RFE-log-3 0.554
5-SRFE vs RFE-log-5 0.470
10-SRFE vs RFE-log-10 0.917

2.6.3 RF


Friedman rank sum test
Friedman chi-squared = 9.725, df = 7, p-value = 0.2047
5-SRFE RFE-log-10 3-SRFE FRFE RFE-log-3 RFE-log 10-SRFE RFE-log-5
3.71 4.09 4.21 4.36 4.59 4.61 5.07 5.36

Wilcoxon signed rank test
cannot compute exact p-value with zeroescannot compute exact p-value with tiescannot compute exact p-value with zeroescannot compute exact p-value with zeroescannot compute exact p-value with zeroes
Hypothesis p.value
FRFE vs RFE-log 0.247
3-SRFE vs RFE-log-3 0.251
5-SRFE vs RFE-log-5 0.013
10-SRFE vs RFE-log-10 0.987

2.6.4 SVM


Friedman rank sum test
Friedman chi-squared = 12.58, df = 7, p-value = 0.08302
RFE-log RFE-log-10 RFE-log-5 3-SRFE RFE-log-3 FRFE 10-SRFE 5-SRFE
3.79 4.12 4.25 4.27 4.34 4.57 5.21 5.45

Wilcoxon signed rank test
cannot compute exact p-value with zeroescannot compute exact p-value with zeroescannot compute exact p-value with zeroescannot compute exact p-value with tiescannot compute exact p-value with zeroes
Hypothesis p.value
FRFE vs RFE-log 0.876
3-SRFE vs RFE-log-3 0.610
5-SRFE vs RFE-log-5 0.962
10-SRFE vs RFE-log-10 0.762

2.7 Processing time

2.7.1 GBM


Friedman rank sum test
Friedman chi-squared = 92.266, df = 7, p-value < 2.2e-16
FRFE RFE-log 5-SRFE 3-SRFE RFE-log-5 10-SRFE RFE-log-3 RFE-log-10
2.18 2.5 4.43 4.71 4.73 5.09 5.96 6.39

Wilcoxon signed rank test
cannot compute exact p-value with zeroescannot compute exact p-value with zeroescannot compute exact p-value with zeroescannot compute exact p-value with zeroes
Hypothesis p.value
FRFE vs RFE-log 0.500
3-SRFE vs RFE-log-3 0.019
5-SRFE vs RFE-log-5 0.040
10-SRFE vs RFE-log-10 0.008

2.7.2 LR


Friedman rank sum test
Friedman chi-squared = 90.481, df = 7, p-value < 2.2e-16
FRFE RFE-log 5-SRFE RFE-log-5 10-SRFE 3-SRFE RFE-log-3 RFE-log-10
1.91 2.41 4.02 4.91 5.09 5.23 5.73 6.7

Wilcoxon signed rank test
cannot compute exact p-value with zeroescannot compute exact p-value with zeroescannot compute exact p-value with zeroescannot compute exact p-value with zeroes
Hypothesis p.value
FRFE vs RFE-log 0.524
3-SRFE vs RFE-log-3 0.346
5-SRFE vs RFE-log-5 0.232
10-SRFE vs RFE-log-10 0.113

2.7.3 RF


Friedman rank sum test
Friedman chi-squared = 152.58, df = 7, p-value < 2.2e-16
RFE-log FRFE RFE-log-5 RFE-log-3 5-SRFE RFE-log-10 10-SRFE 3-SRFE
1.52 1.84 3.45 4.66 4.91 5.55 6.66 7.41

Wilcoxon signed rank test
cannot compute exact p-value with zeroescannot compute exact p-value with zeroescannot compute exact p-value with zeroescannot compute exact p-value with zeroes
Hypothesis p.value
FRFE vs RFE-log 0.978
3-SRFE vs RFE-log-3 1.000
5-SRFE vs RFE-log-5 1.000
10-SRFE vs RFE-log-10 0.981

2.7.4 SVM


Friedman rank sum test
Friedman chi-squared = 92.818, df = 7, p-value < 2.2e-16
FRFE RFE-log 5-SRFE RFE-log-5 3-SRFE 10-SRFE RFE-log-3 RFE-log-10
1.88 2.2 4.39 4.86 5.21 5.25 5.71 6.5

Wilcoxon signed rank test
cannot compute exact p-value with zeroescannot compute exact p-value with zeroescannot compute exact p-value with zeroescannot compute exact p-value with zeroes
Hypothesis p.value
FRFE vs RFE-log 0.627
3-SRFE vs RFE-log-3 0.530
5-SRFE vs RFE-log-5 0.352
10-SRFE vs RFE-log-10 0.223

2.8 Friedman tests summary

prettyTable(friedman_df, 3:ncol(friedman_df), 3)

3 Feature selection plots

df_grid <- read.csv(gzfile("GridScores.csv.gz", "GridScores.csv"), na.strings = c("?", "", "-")) %>% 
  mutate(Dataset = as.character(Dataset), Classifier = as.character(Classifier))

seek on a gzfile connection returned an internal errorseek on a gzfile connection returned an internal errorWarning: closing unused connection 3 (GridScores.csv.gz:GridScores.csv)

df_grid$Dataset <- substr(df_grid$Dataset, 1, nchar(df_grid$Dataset)-4)
df_grid$Dataset <- as.factor(df_grid$Dataset)
df_grid$Classifier[startsWith(df_grid$Classifier, "SVC")] <- "SVM"
df_grid$Classifier[startsWith(df_grid$Classifier, "Random")] <- "RF"
df_grid$Classifier[startsWith(df_grid$Classifier, "Logistic")] <- "LR"
df_grid$Classifier[startsWith(df_grid$Classifier, "LGBM")] <- "GBM"
df_grid$Classifier <- as.factor(df_grid$Classifier)
df_grid$Feature.selector = factor(df_grid$Feature.selector,levels(df_grid$Feature.selector)[c(4,2,3,1,5,7,8,6)])
df_grid$Type <- as.character(df_grid$Feature.selector)
df_grid$Type[df_grid$Type == "FRFE" | df_grid$Type == "3-SRFE" | df_grid$Type == "5-SRFE" | df_grid$Type == "10-SRFE"] <- "FRFE/k-SRFE"
df_grid$Type[df_grid$Type != "FRFE/k-SRFE"] <- "RFE"
df_grid$Type <- as.factor(df_grid$Type)
cbPalette <- c("#a6cee3", "#b2df8a", "#fb9a99", "#fdbf6f", "#1f78b4", "#33a02c", "#e31a1c", "#ff7f00")
for (dataset in unique(df_grid$Dataset)){
  plot_df <- df_grid %>% filter(Dataset == dataset, Fold == 0)
  p <- ggplot(plot_df, aes(Feature.num, Accuracy, color=Feature.selector)) + geom_point(alpha=0.6) + facet_grid(Classifier~Type) + theme_bw() + scale_color_manual(values=cbPalette, name="Selector") + xlab("Number of features")
  ggsave(paste0("images/", dataset, ".svg"), plot = p, dpi = 300, width=8, height=4, units = "in")
  ggsave(paste0("images/", dataset, ".png"), plot = p, dpi = 300, width=8, height=4, units = "in")
  print(p + ggtitle(dataset))
}

Warning: closing unused connection 4 (GridScores.csv.gz)

4 Case study

4.1 Raw results

case_df <- read.csv("CaseStudy.csv", na.strings = c("?", "", "-")) %>%
    mutate(Dataset = as.character(Dataset), Classifier = as.character(Classifier), Selected.num = as.character(Selected.num)) %>%
    select(-c(Start.date, Selector.params, Scorer, Grid.scores, Selected.features))
case_df$Dataset <- substr(case_df$Dataset, 1, nchar(case_df$Dataset)-4)
case_df$Dataset <- as.factor(case_df$Dataset)
case_df$Classifier[startsWith(case_df$Classifier, "SVC")] <- "SVM"
case_df$Classifier[startsWith(case_df$Classifier, "Random")] <- "RF"
case_df$Classifier[startsWith(case_df$Classifier, "Logistic")] <- "LR"
case_df$Classifier[startsWith(case_df$Classifier, "LGBM")] <- "GBM"
case_df$Classifier <- as.factor(case_df$Classifier)
case_df$Selected.num[case_df$Selected.num == "error"] <- NA
case_df$Selected.num <- as.numeric(case_df$Selected.num)
prettyTable(case_df, c(9, 11:15))

4.2 Mean cross-validation scores

case_cv_df <- case_df %>%
  select(-c(Attributes, Number.of.classes, Min.class.examples, Max.class.examples)) %>%
  group_by(Dataset, Classifier, Feature.selector) %>%
  summarise_all(mean, na_rm=T)
  
prettyTable(case_cv_df, c(5, 8:12))

4.3 Common features

scores_df <- read.csv("KappaScores.csv", na.strings = c("?", "", "-")) %>%
    mutate(Dataset = as.character(Dataset), Classifier = as.character(Classifier))
scores_df$Dataset <- substr(scores_df$Dataset, 1, nchar(scores_df$Dataset)-9)
scores_df$Dataset <- as.factor(scores_df$Dataset)
scores_df$Classifier[startsWith(scores_df$Classifier, "SVC")] <- "SVM"
scores_df$Classifier[startsWith(scores_df$Classifier, "Random")] <- "RF"
scores_df$Classifier[startsWith(scores_df$Classifier, "Logistic")] <- "LR"
scores_df$Classifier[startsWith(scores_df$Classifier, "LGBM")] <- "GBM"
scores_df$Classifier <- as.factor(scores_df$Classifier)
  
prettyTable(scores_df, 5)
LS0tDQp0aXRsZTogIkZpYm9uYWNjaSBhbmQgay1TdWJzZWN0aW5nIFJlY3Vyc2l2ZSBGZWF0dXJlIEVsaW1pbmF0aW9uOiBTdXBwbGVtZW50Ig0Kb3V0cHV0OiANCiAgaHRtbF9ub3RlYm9vazogDQogICAgY29kZV9mb2xkaW5nOiBoaWRlDQogICAgbnVtYmVyX3NlY3Rpb25zOiB5ZXMNCiAgICB0b2M6IHllcw0KICAgIHRvY19kZXB0aDogMg0KLS0tDQoNCmBgYHtyIGxpYnJhcmllcywgbWVzc2FnZT1GQUxTRSwgd2FybmluZz1GQUxTRX0NCmxpYnJhcnkoa25pdHIpDQpsaWJyYXJ5KGRwbHlyKQ0KbGlicmFyeSh0aWR5cikNCmxpYnJhcnkoZ2dwbG90MikNCmxpYnJhcnkocGxvdGx5KQ0KbGlicmFyeShEVCkNCmxpYnJhcnkoZ2d0aGVtZXMpDQpsaWJyYXJ5KFBNQ01SKQ0KbGlicmFyeShzY21hbXApDQoNCmtuaXRyOjpvcHRzX2NodW5rJHNldChkcGk9OTYsIHJlc3VsdHMgPSAiYXNpcyIpDQprbml0cjo6a25pdF9ob29rcyRzZXQoaW5saW5lID0gZnVuY3Rpb24oeCkgew0KICBwcmV0dHlOdW0oeCwgYmlnLm1hcms9IiwiKQ0KfSkNClNBVkVfRVBTID0gRg0KYGBgDQoNCmBgYHtyIGZ1bmN0aW9ucywgd2FybmluZz1GLCBtZXNzYWdlPUZ9DQpwcmV0dHlUYWJsZSA8LSBmdW5jdGlvbih0YWJsZV9kZiwgcm91bmRfY29sdW1ucz1udW1lcmljKCksIHJvdW5kX2RpZ2l0cz0zLCBtYWtlX3Bsb3Q9VCkgew0KICAgIERUOjpkYXRhdGFibGUodGFibGVfZGYsIHN0eWxlPSJib290c3RyYXAiLCBmaWx0ZXIgPSAidG9wIiwgcm93bmFtZXMgPSBGQUxTRSwgZXh0ZW5zaW9ucyA9ICJCdXR0b25zIiwNCiAgICAgICAgICAgICAgICAgIG9wdGlvbnMgPSBsaXN0KGRvbSA9ICdCZnJ0aXAnLCBidXR0b25zID0gYygnY29weScsICdjc3YnLCAnZXhjZWwnLCAncGRmJywgJ3ByaW50JykpKSAlPiUNCiAgICBmb3JtYXRSb3VuZChyb3VuZF9jb2x1bW5zLCByb3VuZF9kaWdpdHMpDQp9DQoNCnJlcG9ydEZyaWVkbWFuIDwtIGZ1bmN0aW9uKGRmLCBjbGFzc2lmaWVyLCBtZXRyaWMsIG1ldHJpY19mdW5jdGlvbiwgbWV0cmljX2RpcmVjdGlvbj0xLCBtYWtlX3Bsb3QgPSBUKXsNCiAgICBhdmVyYWdlcyA9IGRhdGEuZnJhbWUoZGYpICU+JQ0KICAgICAgICBzZWxlY3QoLWMoQXR0cmlidXRlcywgTnVtYmVyLm9mLmNsYXNzZXMsIE1pbi5jbGFzcy5leGFtcGxlcywgTWF4LmNsYXNzLmV4YW1wbGVzKSkgJT4lDQogICAgICAgIGdyb3VwX2J5KERhdGFzZXQsIENsYXNzaWZpZXIsIEZlYXR1cmUuc2VsZWN0b3IpICU+JQ0KICAgICAgICBzdW1tYXJpc2VfYWxsKG1ldHJpY19mdW5jdGlvbiwgbmEucm09VCkgJT4lDQogICAgICAgIGRhdGEuZnJhbWUoKSAlPiUNCiAgICAgICAgZmlsdGVyKEZlYXR1cmUuc2VsZWN0b3IgIT0gJ0FsbCcsIENsYXNzaWZpZXIgPT0gY2xhc3NpZmllcikgJT4lDQogICAgICAgIHNlbGVjdF8oIkRhdGFzZXQiLCAiRmVhdHVyZS5zZWxlY3RvciIsIG1ldHJpYykgJT4lDQogICAgICAgIHNwcmVhZCgiRmVhdHVyZS5zZWxlY3RvciIsIG1ldHJpYykgJT4lDQogICAgICAgIHNlbGVjdCgtRGF0YXNldCkgJT4lDQogICAgICAgIHNlbGVjdChgRlJGRWAsIGBSRkUtbG9nYCwgYDMtU1JGRWAsIGBSRkUtbG9nLTNgLCBgNS1TUkZFYCwgYFJGRS1sb2ctNWAsIGAxMC1TUkZFYCwgYFJGRS1sb2ctMTBgKSAlPiUNCiAgICAgICAgZGF0YS5tYXRyaXgoKQ0KICAgIA0KICAgIGlmIChtZXRyaWNfZGlyZWN0aW9uID09IDEpIHsNCiAgICAgIGF2ZXJhZ2VzX3IgPSAtYXZlcmFnZXMNCiAgICAgIGF2ZXJhZ2VzX3QgPSBhdmVyYWdlcw0KICAgIH0gZWxzZSB7DQogICAgICBhdmVyYWdlc19yID0gYXZlcmFnZXMNCiAgICAgIGF2ZXJhZ2VzX3QgPSAtYXZlcmFnZXMNCiAgICB9DQogICAgcmFua3MgPC0gdChhcHBseShhdmVyYWdlc19yLCAxLCByYW5rKSkNCiAgICByYW5rcyA8LSByYW5rc1ssb3JkZXIoY29sTWVhbnMocmFua3MsIG5hLnJtPVRSVUUpKV0NCg0KICAgIGNhdCgiPGhyPjxzdHJvbmc+RnJpZWRtYW4gcmFuayBzdW0gdGVzdDwvc3Ryb25nPjxiciAvPiIpDQogICAgZlRlc3QgPC0gZnJpZWRtYW4udGVzdChhdmVyYWdlc190KQ0KICAgIHRlc3RSZXN1bHQgPC0gY2FwdHVyZS5vdXRwdXQocHJpbnQoZlRlc3QpKQ0KICAgIGNhdCh0ZXN0UmVzdWx0WzVdKQ0KICAgIGNhdCgiXHJcbiIpDQogICAgbWVhbl9yYW5rcyA8LSB0KGNvbE1lYW5zKHJhbmtzLCBuYS5ybT1UUlVFKSkNCiAgICBwcmludChrYWJsZShtZWFuX3JhbmtzLCBkaWdpdHMgPSAyKSkNCiAgICBjYXQoIlxyXG4iKQ0KICAgIA0KICAgIHdfZGYgPC0gZGF0YS5mcmFtZShTdWJzZWN0aW5nID0gYygiRlJGRSIsICIzLVNSRkUiLCAiNS1TUkZFIiwgIjEwLVNSRkUiKSwNCiAgICAgICAgICAgICAgICAgICAgICAgU3RhbmRhcmQgPSBjKCJSRkUtbG9nIiwgIlJGRS1sb2ctMyIsICJSRkUtbG9nLTUiLCAiUkZFLWxvZy0xMCIpLCANCiAgICAgICAgICAgICAgICAgICAgICAgcC52YWx1ZSA9IGMoLTEsIC0xLC0xLCAtMSkpICU+JQ0KICAgICAgbXV0YXRlKEh5cG90aGVzaXMgPSBwYXN0ZTAoU3Vic2VjdGluZywgIiB2cyAiLCBTdGFuZGFyZCkpDQogICAgY2F0KCI8aHI+PHN0cm9uZz5XaWxjb3hvbiBzaWduZWQgcmFuayB0ZXN0PC9zdHJvbmc+PGJyIC8+IikNCiAgICBmb3IgKHIgaW4gMTpucm93KHdfZGYpKXsNCiAgICAgIHdUZXN0IDwtIHdpbGNveC50ZXN0KGF2ZXJhZ2VzX3RbLGFzLmNoYXJhY3Rlcih3X2RmW3IsICJTdWJzZWN0aW5nIl0pXSwNCiAgICAgICAgICAgICAgICAgICAgICAgICAgIGF2ZXJhZ2VzX3RbLGFzLmNoYXJhY3Rlcih3X2RmW3IsICJTdGFuZGFyZCJdKV0sDQogICAgICAgICAgICAgICAgICAgICAgICAgICBwYWlyZWQ9VFJVRSwgYWx0ZXJuYXRpdmUgPSAiZyIpDQogICAgICB3X2RmW3IsICJwLnZhbHVlIl0gPC0gd1Rlc3QkcC52YWx1ZQ0KICAgIH0NCiAgICBwcmludChrYWJsZSh3X2RmICU+JSBzZWxlY3QoSHlwb3RoZXNpcywgcC52YWx1ZSksIGRpZ2l0cyA9IDMpKQ0KICAgIGNhdCgiXHJcbiIpDQogICAgDQogICAgDQogICAgDQogICAgaWYgKG1ha2VfcGxvdCl7DQogICAgICAgIHNldEVQUygpDQogICAgICAgIHBhcihtYXI9YygwLDAsMCwwKSkNCiAgICAgICAgcG9zdHNjcmlwdChwYXN0ZTAoImltYWdlcy8iLCBtZXRyaWMsICJfIiwgY2xhc3NpZmllciwgIl8iLCAgIkZyaWVkbWFuLmVwcyIpLCB3aWR0aCA9IDcsIGhlaWdodCA9IDMuNCkNCiAgICAgICAgcGxvdENEKHJlc3VsdHMubWF0cml4ID0gYXZlcmFnZXNfdCwgYWxwaGEgPSAwLjA1LCBjZXggPSAxLjEpDQogICAgICAgIGRldi5vZmYoKQ0KICAgIH0NCg0KICAgIGNhdCgiPGhyPiIpDQogICAgbWVhbl9yYW5rc19kZiA8LSBkYXRhLmZyYW1lKGBGUkZFYCA9IG1lYW5fcmFua3NbMSwgIkZSRkUiXSwgYFJGRS1sb2dgID0gbWVhbl9yYW5rc1sxLCAiUkZFLWxvZyJdLCBgMy1TUkZFYCA9IG1lYW5fcmFua3NbMSwgIjMtU1JGRSJdLCBgUkZFLWxvZy0zYCA9IG1lYW5fcmFua3NbMSwgIlJGRS1sb2ctMyJdLCBgNS1TUkZFYCA9IG1lYW5fcmFua3NbMSwgIjUtU1JGRSJdLCBgUkZFLWxvZy01YCA9IG1lYW5fcmFua3NbMSwgIlJGRS1sb2ctNSJdLCBgMTAtU1JGRWAgPSBtZWFuX3JhbmtzWzEsICIxMC1TUkZFIl0sIGBSRkUtbG9nLTEwYCA9IG1lYW5fcmFua3NbMSwgIlJGRS1sb2ctMTAiXSwgYHAtdmFsdWVgID0gZlRlc3QkcC52YWx1ZSkNCiAgICBtZWFuX3JhbmtzX2RmDQp9DQpgYGANCg0KDQojIEJlbmNobWFyayByZXN1bHRzDQoNCiMjIFJhdyByZXN1bHRzDQoNCmBgYHtyIHJhdyBzdW1tYXJ5fQ0KZGYgPC0gcmVhZC5jc3YoIkJlbmNobWFya3MuY3N2IiwgbmEuc3RyaW5ncyA9IGMoIj8iLCAiIiwgIi0iKSkgJT4lDQogICAgbXV0YXRlKERhdGFzZXQgPSBhcy5jaGFyYWN0ZXIoRGF0YXNldCksIENsYXNzaWZpZXIgPSBhcy5jaGFyYWN0ZXIoQ2xhc3NpZmllciksIFNlbGVjdGVkLm51bSA9IGFzLmNoYXJhY3RlcihTZWxlY3RlZC5udW0pKSAlPiUNCiAgICBzZWxlY3QoLWMoU3RhcnQuZGF0ZSwgU2VsZWN0b3IucGFyYW1zLCBTY29yZXIsIEdyaWQuc2NvcmVzLCBTZWxlY3RlZC5mZWF0dXJlcykpDQoNCg0KZGYkRGF0YXNldCA8LSBzdWJzdHIoZGYkRGF0YXNldCwgMSwgbmNoYXIoZGYkRGF0YXNldCktNCkNCmRmJERhdGFzZXQgPC0gYXMuZmFjdG9yKGRmJERhdGFzZXQpDQoNCmRmJENsYXNzaWZpZXJbc3RhcnRzV2l0aChkZiRDbGFzc2lmaWVyLCAiU1ZDIildIDwtICJTVk0iDQpkZiRDbGFzc2lmaWVyW3N0YXJ0c1dpdGgoZGYkQ2xhc3NpZmllciwgIlJhbmRvbSIpXSA8LSAiUkYiDQpkZiRDbGFzc2lmaWVyW3N0YXJ0c1dpdGgoZGYkQ2xhc3NpZmllciwgIkxvZ2lzdGljIildIDwtICJMUiINCmRmJENsYXNzaWZpZXJbc3RhcnRzV2l0aChkZiRDbGFzc2lmaWVyLCAiTEdCTSIpXSA8LSAiR0JNIg0KZGYkQ2xhc3NpZmllciA8LSBhcy5mYWN0b3IoZGYkQ2xhc3NpZmllcikNCg0KZGYkU2VsZWN0ZWQubnVtW2RmJFNlbGVjdGVkLm51bSA9PSAiZXJyb3IiXSA8LSBOQQ0KZGYkU2VsZWN0ZWQubnVtIDwtIGFzLm51bWVyaWMoZGYkU2VsZWN0ZWQubnVtKQ0KDQpwcmV0dHlUYWJsZShkZiwgYyg5LCAxMToxNSkpDQpgYGANCg0KIyMgRGF0YXNldHMNCg0KYGBge3IgZGF0YXNldCBzdW1tYXJ5fQ0KZGF0YXNldHNfZGYgPC0gZGYgJT4lDQogIHNlbGVjdChEYXRhc2V0LCBFeGFtcGxlcywgQXR0cmlidXRlcywgTnVtYmVyLm9mLmNsYXNzZXMsIE1pbi5jbGFzcy5leGFtcGxlcywgTWF4LmNsYXNzLmV4YW1wbGVzKSAlPiUNCiAgZGlzdGluY3QoKQ0KICANCnByZXR0eVRhYmxlKGRhdGFzZXRzX2RmKQ0KYGBgDQoNCiMjIE1lYW4gY3Jvc3MtdmFsaWRhdGlvbiBzY29yZXMNCg0KYGBge3IgY3Ygc3VtbWFyeSwgd2FybmluZz1GfQ0KY3ZfZGYgPC0gZGYgJT4lDQogIHNlbGVjdCgtYyhBdHRyaWJ1dGVzLCBOdW1iZXIub2YuY2xhc3NlcywgTWluLmNsYXNzLmV4YW1wbGVzLCBNYXguY2xhc3MuZXhhbXBsZXMpKSAlPiUNCiAgZ3JvdXBfYnkoRGF0YXNldCwgQ2xhc3NpZmllciwgRmVhdHVyZS5zZWxlY3RvcikgJT4lDQogIHN1bW1hcmlzZV9hbGwobWVhbiwgbmFfcm09VCkNCiAgDQoNCnByZXR0eVRhYmxlKGN2X2RmLCBjKDUsIDg6MTIpKQ0KYGBgDQoNCiMgQ29tcGFyaXNvbnMgYW5kIHN0YXRpc3RpY2FsIHRlc3RzIHsudGFic2V0fQ0KDQpgYGB7ciwgcmVzdWx0cz0iYXNpcyIsIHdhcm5pbmc9RiwgbWVzc2FnZT1GLCBlcnJvcj1GfQ0KY2xhc3NpZmllcnMgPC0gYXMuY2hhcmFjdGVyKHVuaXF1ZShjdl9kZiRDbGFzc2lmaWVyKSkNCm1ldHJpY3MgPC0gYygiU2VsZWN0ZWQubnVtIiwgIlNlbGVjdGVkLm51bSIsICJBY2N1cmFjeSIsICJLYXBwYSIsICJNYWNyby5yZWNhbGwiLCAiRy5tZWFuIiwgIlByb2Nlc3NpbmcudGltZSIpDQptZXRyaWNfZnVuY3Rpb25zIDwtIGMobWVhbiwgc2QsIG1lYW4sIG1lYW4sIG1lYW4sIG1lYW4sIG1lYW4pDQptZXJ0cmljX2hlYWRlcnMgPC1jKCJOdW1iZXIgb2Ygc2VsZWN0ZWQgZmVhdHVyZXMiLCAiU3RhbmRhcmQgZGV2aWF0aW9uIG9mIG51bWJlciBvZiBzZWxlY3RlZCBmZWF0dXJlcyIsICJBY2N1cmFjeSIsICJLYXBwYSIsICJNYWNybyByZWNhbGwiLCAiRy1tZWFuIiwgIlByb2Nlc3NpbmcgdGltZSIpDQptZXRyaWNfZGlyZWN0aW9uIDwtIGMoLTEsIC0xLCAxLCAxLCAxLCAxLCAtMSkNCg0KZnJpZWRtYW5fZGYgPC0gZGF0YS5mcmFtZShNZXRyaWMgPSBjaGFyYWN0ZXIoKSwgQ2xhc3NpZmllciA9IGNoYXJhY3RlcigpLCBgRlJGRWAgPSBudW1lcmljKCksIGBSRkUtbG9nYCA9IG51bWVyaWMoKSwgYDMtU1JGRWAgPSBudW1lcmljKCksIGBSRkUtbG9nLTNgID0gbnVtZXJpYygpLCBgNS1TUkZFYCA9IG51bWVyaWMoKSwgYFJGRS1sb2ctNWAgPSBudW1lcmljKCksIGAxMC1TUkZFYCA9IG51bWVyaWMoKSwgYFJGRS1sb2ctMTBgID0gbnVtZXJpYygpLCBgcC12YWx1ZWAgPSBudW1lcmljKCkpDQoNCmZvciAoaSBpbiBzZXFfYWxvbmcobWV0cmljcykpew0KICBjYXQocGFzdGUwKCIjIyAiLCBtZXJ0cmljX2hlYWRlcnNbaV0sIlxyXG5cclxuIikpDQogIGZvciAoY2xhc3NpZmllciBpbiBjbGFzc2lmaWVycyl7DQogICAgY2F0KHBhc3RlMCgiIyMjICIsIGNsYXNzaWZpZXIsIlxyXG5cclxuIikpDQogICAgbWVhbl9yYW5rcyA8LSByZXBvcnRGcmllZG1hbihkZiwgY2xhc3NpZmllciwgbWV0cmljc1tpXSwgbWV0cmljX2Z1bmN0aW9uc1tpXSwgbWV0cmljX2RpcmVjdGlvbltpXSwgbWFrZV9wbG90ID0gRikNCiAgICBtZWFuX2RmIDwtIGRhdGEuZnJhbWUoTWV0cmljID0gbWVydHJpY19oZWFkZXJzW2ldLCBDbGFzc2lmaWVyID0gY2xhc3NpZmllciwgYEZSRkVgID0gbWVhbl9yYW5rcyRGUkZFLCBgUkZFLWxvZ2AgPSBtZWFuX3JhbmtzJFJGRS5sb2csIGAzLVNSRkVgID0gbWVhbl9yYW5rcyRYMy5TUkZFLCBgUkZFLWxvZy0zYCA9IG1lYW5fcmFua3MkUkZFLmxvZy4zLCBgNS1TUkZFYCA9IG1lYW5fcmFua3MkWDUuU1JGRSwgYFJGRS1sb2ctNWAgPSBtZWFuX3JhbmtzJFJGRS5sb2cuNSwgYDEwLVNSRkVgID0gbWVhbl9yYW5rcyRYMTAuU1JGRSwgYFJGRS1sb2ctMTBgID0gbWVhbl9yYW5rcyRSRkUubG9nLjEwLCBgcC12YWx1ZWAgPSBtZWFuX3JhbmtzJHAudmFsdWUpDQogICAgZnJpZWRtYW5fZGYgPC0gcmJpbmQoZnJpZWRtYW5fZGYsIG1lYW5fZGYpDQogIH0NCn0NCg0KYGBgDQoNCiMjIEZyaWVkbWFuIHRlc3RzIHN1bW1hcnkNCg0KYGBge3IgZnJlaWRtYW5TdW1tYXJ5fQ0KcHJldHR5VGFibGUoZnJpZWRtYW5fZGYsIDM6bmNvbChmcmllZG1hbl9kZiksIDMpDQpgYGANCg0KDQojIEZlYXR1cmUgc2VsZWN0aW9uIHBsb3RzDQoNCmBgYHtyIGZlYXR1cmUgcGxvdHN9DQpkZl9ncmlkIDwtIHJlYWQuY3N2KGd6ZmlsZSgiR3JpZFNjb3Jlcy5jc3YuZ3oiLCAiR3JpZFNjb3Jlcy5jc3YiKSwgbmEuc3RyaW5ncyA9IGMoIj8iLCAiIiwgIi0iKSkgJT4lIA0KICBtdXRhdGUoRGF0YXNldCA9IGFzLmNoYXJhY3RlcihEYXRhc2V0KSwgQ2xhc3NpZmllciA9IGFzLmNoYXJhY3RlcihDbGFzc2lmaWVyKSkNCmRmX2dyaWQkRGF0YXNldCA8LSBzdWJzdHIoZGZfZ3JpZCREYXRhc2V0LCAxLCBuY2hhcihkZl9ncmlkJERhdGFzZXQpLTQpDQpkZl9ncmlkJERhdGFzZXQgPC0gYXMuZmFjdG9yKGRmX2dyaWQkRGF0YXNldCkNCg0KZGZfZ3JpZCRDbGFzc2lmaWVyW3N0YXJ0c1dpdGgoZGZfZ3JpZCRDbGFzc2lmaWVyLCAiU1ZDIildIDwtICJTVk0iDQpkZl9ncmlkJENsYXNzaWZpZXJbc3RhcnRzV2l0aChkZl9ncmlkJENsYXNzaWZpZXIsICJSYW5kb20iKV0gPC0gIlJGIg0KZGZfZ3JpZCRDbGFzc2lmaWVyW3N0YXJ0c1dpdGgoZGZfZ3JpZCRDbGFzc2lmaWVyLCAiTG9naXN0aWMiKV0gPC0gIkxSIg0KZGZfZ3JpZCRDbGFzc2lmaWVyW3N0YXJ0c1dpdGgoZGZfZ3JpZCRDbGFzc2lmaWVyLCAiTEdCTSIpXSA8LSAiR0JNIg0KZGZfZ3JpZCRDbGFzc2lmaWVyIDwtIGFzLmZhY3RvcihkZl9ncmlkJENsYXNzaWZpZXIpDQoNCmRmX2dyaWQkRmVhdHVyZS5zZWxlY3RvciA9IGZhY3RvcihkZl9ncmlkJEZlYXR1cmUuc2VsZWN0b3IsbGV2ZWxzKGRmX2dyaWQkRmVhdHVyZS5zZWxlY3RvcilbYyg0LDIsMywxLDUsNyw4LDYpXSkNCg0KZGZfZ3JpZCRUeXBlIDwtIGFzLmNoYXJhY3RlcihkZl9ncmlkJEZlYXR1cmUuc2VsZWN0b3IpDQpkZl9ncmlkJFR5cGVbZGZfZ3JpZCRUeXBlID09ICJGUkZFIiB8IGRmX2dyaWQkVHlwZSA9PSAiMy1TUkZFIiB8IGRmX2dyaWQkVHlwZSA9PSAiNS1TUkZFIiB8IGRmX2dyaWQkVHlwZSA9PSAiMTAtU1JGRSJdIDwtICJGUkZFL2stU1JGRSINCmRmX2dyaWQkVHlwZVtkZl9ncmlkJFR5cGUgIT0gIkZSRkUvay1TUkZFIl0gPC0gIlJGRSINCmRmX2dyaWQkVHlwZSA8LSBhcy5mYWN0b3IoZGZfZ3JpZCRUeXBlKQ0KDQoNCmNiUGFsZXR0ZSA8LSBjKCIjYTZjZWUzIiwgIiNiMmRmOGEiLCAiI2ZiOWE5OSIsICIjZmRiZjZmIiwgIiMxZjc4YjQiLCAiIzMzYTAyYyIsICIjZTMxYTFjIiwgIiNmZjdmMDAiKQ0KDQpmb3IgKGRhdGFzZXQgaW4gdW5pcXVlKGRmX2dyaWQkRGF0YXNldCkpew0KICBwbG90X2RmIDwtIGRmX2dyaWQgJT4lIGZpbHRlcihEYXRhc2V0ID09IGRhdGFzZXQsIEZvbGQgPT0gMCkNCiAgcCA8LSBnZ3Bsb3QocGxvdF9kZiwgYWVzKEZlYXR1cmUubnVtLCBBY2N1cmFjeSwgY29sb3I9RmVhdHVyZS5zZWxlY3RvcikpICsgZ2VvbV9wb2ludChhbHBoYT0wLjYpICsgZmFjZXRfZ3JpZChDbGFzc2lmaWVyflR5cGUpICsgdGhlbWVfYncoKSArIHNjYWxlX2NvbG9yX21hbnVhbCh2YWx1ZXM9Y2JQYWxldHRlLCBuYW1lPSJTZWxlY3RvciIpICsgeGxhYigiTnVtYmVyIG9mIGZlYXR1cmVzIikNCiAgZ2dzYXZlKHBhc3RlMCgiaW1hZ2VzLyIsIGRhdGFzZXQsICIuc3ZnIiksIHBsb3QgPSBwLCBkcGkgPSAzMDAsIHdpZHRoPTgsIGhlaWdodD00LCB1bml0cyA9ICJpbiIpDQogIGdnc2F2ZShwYXN0ZTAoImltYWdlcy8iLCBkYXRhc2V0LCAiLnBuZyIpLCBwbG90ID0gcCwgZHBpID0gMzAwLCB3aWR0aD04LCBoZWlnaHQ9NCwgdW5pdHMgPSAiaW4iKQ0KICBwcmludChwICsgZ2d0aXRsZShkYXRhc2V0KSkNCn0NCmBgYA0KDQojIENhc2Ugc3R1ZHkNCg0KIyMgUmF3IHJlc3VsdHMNCg0KYGBge3IgY2FzZV9zdHVkeX0NCmNhc2VfZGYgPC0gcmVhZC5jc3YoIkNhc2VTdHVkeS5jc3YiLCBuYS5zdHJpbmdzID0gYygiPyIsICIiLCAiLSIpKSAlPiUNCiAgICBtdXRhdGUoRGF0YXNldCA9IGFzLmNoYXJhY3RlcihEYXRhc2V0KSwgQ2xhc3NpZmllciA9IGFzLmNoYXJhY3RlcihDbGFzc2lmaWVyKSwgU2VsZWN0ZWQubnVtID0gYXMuY2hhcmFjdGVyKFNlbGVjdGVkLm51bSkpICU+JQ0KICAgIHNlbGVjdCgtYyhTdGFydC5kYXRlLCBTZWxlY3Rvci5wYXJhbXMsIFNjb3JlciwgR3JpZC5zY29yZXMsIFNlbGVjdGVkLmZlYXR1cmVzKSkNCg0KDQpjYXNlX2RmJERhdGFzZXQgPC0gc3Vic3RyKGNhc2VfZGYkRGF0YXNldCwgMSwgbmNoYXIoY2FzZV9kZiREYXRhc2V0KS05KQ0KY2FzZV9kZiREYXRhc2V0IDwtIGFzLmZhY3RvcihjYXNlX2RmJERhdGFzZXQpDQoNCmNhc2VfZGYkQ2xhc3NpZmllcltzdGFydHNXaXRoKGNhc2VfZGYkQ2xhc3NpZmllciwgIlNWQyIpXSA8LSAiU1ZNIg0KY2FzZV9kZiRDbGFzc2lmaWVyW3N0YXJ0c1dpdGgoY2FzZV9kZiRDbGFzc2lmaWVyLCAiUmFuZG9tIildIDwtICJSRiINCmNhc2VfZGYkQ2xhc3NpZmllcltzdGFydHNXaXRoKGNhc2VfZGYkQ2xhc3NpZmllciwgIkxvZ2lzdGljIildIDwtICJMUiINCmNhc2VfZGYkQ2xhc3NpZmllcltzdGFydHNXaXRoKGNhc2VfZGYkQ2xhc3NpZmllciwgIkxHQk0iKV0gPC0gIkdCTSINCmNhc2VfZGYkQ2xhc3NpZmllciA8LSBhcy5mYWN0b3IoY2FzZV9kZiRDbGFzc2lmaWVyKQ0KDQpjYXNlX2RmJFNlbGVjdGVkLm51bVtjYXNlX2RmJFNlbGVjdGVkLm51bSA9PSAiZXJyb3IiXSA8LSBOQQ0KY2FzZV9kZiRTZWxlY3RlZC5udW0gPC0gYXMubnVtZXJpYyhjYXNlX2RmJFNlbGVjdGVkLm51bSkNCg0KcHJldHR5VGFibGUoY2FzZV9kZiwgYyg5LCAxMToxNSkpDQpgYGANCiMjIE1lYW4gY3Jvc3MtdmFsaWRhdGlvbiBzY29yZXMNCg0KYGBge3IgY2FzZSBjdiBzdW1tYXJ5LCB3YXJuaW5nPUZ9DQpjYXNlX2N2X2RmIDwtIGNhc2VfZGYgJT4lDQogIHNlbGVjdCgtYyhBdHRyaWJ1dGVzLCBOdW1iZXIub2YuY2xhc3NlcywgTWluLmNsYXNzLmV4YW1wbGVzLCBNYXguY2xhc3MuZXhhbXBsZXMpKSAlPiUNCiAgZ3JvdXBfYnkoRGF0YXNldCwgQ2xhc3NpZmllciwgRmVhdHVyZS5zZWxlY3RvcikgJT4lDQogIHN1bW1hcmlzZV9hbGwobWVhbiwgbmFfcm09VCkNCiAgDQoNCnByZXR0eVRhYmxlKGNhc2VfY3ZfZGYsIGMoNSwgODoxMikpDQpgYGANCg0KIyMgQ29tbW9uIGZlYXR1cmVzDQoNCmBgYHtyIGNhc2UgY29tbW9uIGZlYXR1cmVzLCB3YXJuaW5nPUZ9DQpzY29yZXNfZGYgPC0gcmVhZC5jc3YoIkthcHBhU2NvcmVzLmNzdiIsIG5hLnN0cmluZ3MgPSBjKCI/IiwgIiIsICItIikpICU+JQ0KICAgIG11dGF0ZShEYXRhc2V0ID0gYXMuY2hhcmFjdGVyKERhdGFzZXQpLCBDbGFzc2lmaWVyID0gYXMuY2hhcmFjdGVyKENsYXNzaWZpZXIpKQ0KDQoNCnNjb3Jlc19kZiREYXRhc2V0IDwtIHN1YnN0cihzY29yZXNfZGYkRGF0YXNldCwgMSwgbmNoYXIoc2NvcmVzX2RmJERhdGFzZXQpLTkpDQpzY29yZXNfZGYkRGF0YXNldCA8LSBhcy5mYWN0b3Ioc2NvcmVzX2RmJERhdGFzZXQpDQoNCnNjb3Jlc19kZiRDbGFzc2lmaWVyW3N0YXJ0c1dpdGgoc2NvcmVzX2RmJENsYXNzaWZpZXIsICJTVkMiKV0gPC0gIlNWTSINCnNjb3Jlc19kZiRDbGFzc2lmaWVyW3N0YXJ0c1dpdGgoc2NvcmVzX2RmJENsYXNzaWZpZXIsICJSYW5kb20iKV0gPC0gIlJGIg0Kc2NvcmVzX2RmJENsYXNzaWZpZXJbc3RhcnRzV2l0aChzY29yZXNfZGYkQ2xhc3NpZmllciwgIkxvZ2lzdGljIildIDwtICJMUiINCnNjb3Jlc19kZiRDbGFzc2lmaWVyW3N0YXJ0c1dpdGgoc2NvcmVzX2RmJENsYXNzaWZpZXIsICJMR0JNIildIDwtICJHQk0iDQpzY29yZXNfZGYkQ2xhc3NpZmllciA8LSBhcy5mYWN0b3Ioc2NvcmVzX2RmJENsYXNzaWZpZXIpDQogIA0KDQpwcmV0dHlUYWJsZShzY29yZXNfZGYsIDUpDQpgYGA=